In [1]:
import pandas as pd
In [2]:
df = pd.read_csv("AB_NYC_2019.csv")
In [3]:
df.head(2)
Out[3]:
id name host_id host_name neighbourhood_group neighbourhood latitude longitude room_type price minimum_nights number_of_reviews last_review reviews_per_month calculated_host_listings_count availability_365
0 2539 Clean & quiet apt home by the park 2787 John Brooklyn Kensington 40.64749 -73.97237 Private room 149 1 9 19-10-2018 0.21 6 365
1 2595 Skylit Midtown Castle 2845 Jennifer Manhattan Midtown 40.75362 -73.98377 Entire home/apt 225 1 45 21-05-2019 0.38 2 355
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48906 entries, 0 to 48905
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              48906 non-null  int64  
 1   name                            48890 non-null  object 
 2   host_id                         48906 non-null  int64  
 3   host_name                       48885 non-null  object 
 4   neighbourhood_group             48906 non-null  object 
 5   neighbourhood                   48906 non-null  object 
 6   latitude                        48906 non-null  float64
 7   longitude                       48906 non-null  float64
 8   room_type                       48906 non-null  object 
 9   price                           48906 non-null  int64  
 10  minimum_nights                  48906 non-null  int64  
 11  number_of_reviews               48906 non-null  int64  
 12  last_review                     38854 non-null  object 
 13  reviews_per_month               38854 non-null  float64
 14  calculated_host_listings_count  48906 non-null  int64  
 15  availability_365                48906 non-null  int64  
dtypes: float64(3), int64(7), object(6)
memory usage: 6.0+ MB
In [5]:
df["id"] = df["id"].astype(str)
In [6]:
df["id"].dtype
Out[6]:
dtype('O')
In [7]:
df["host_id"] = df["host_id"].astype(str)
In [8]:
df["last_review"] = pd.to_datetime(df["last_review"])
C:\Users\Satyam\AppData\Local\Temp\ipykernel_12044\3465608367.py:1: UserWarning: Parsing dates in %d-%m-%Y format when dayfirst=False (the default) was specified. Pass `dayfirst=True` or specify a format to silence this warning.
  df["last_review"] = pd.to_datetime(df["last_review"])
In [9]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48906 entries, 0 to 48905
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   id                              48906 non-null  object        
 1   name                            48890 non-null  object        
 2   host_id                         48906 non-null  object        
 3   host_name                       48885 non-null  object        
 4   neighbourhood_group             48906 non-null  object        
 5   neighbourhood                   48906 non-null  object        
 6   latitude                        48906 non-null  float64       
 7   longitude                       48906 non-null  float64       
 8   room_type                       48906 non-null  object        
 9   price                           48906 non-null  int64         
 10  minimum_nights                  48906 non-null  int64         
 11  number_of_reviews               48906 non-null  int64         
 12  last_review                     38854 non-null  datetime64[ns]
 13  reviews_per_month               38854 non-null  float64       
 14  calculated_host_listings_count  48906 non-null  int64         
 15  availability_365                48906 non-null  int64         
dtypes: datetime64[ns](1), float64(3), int64(5), object(7)
memory usage: 6.0+ MB